1.11 Required Packages
library("leaflet")
library(readr)
library(dplyr)
library(RColorBrewer)
library(Hmisc)
library(ggplot2)
library(formatR)
fuk2011 <- read.csv("jun_2011_fukushima.csv")
fuk2013 <- read.csv("nov_2013_fukushima.csv")
names(fuk2011) <- c("gridcode", "pref", "city", "gridCenterNorthlat",
"gridCenterEastlng", "gridCenterNorthlatDec", "gridCenterEastlngDec",
"daichi_distance", "no_samples", "AvgAirDoseRate", "NE_nLat",
"NE_eLong", "NW_nLat", "NW_eLong", "SW_nLat", "SW_eLong",
"SE_nLat", "SE_eLong")
names(fuk2013) <- c("gridcode", "pref", "city", "gridCenterNorthlat",
"gridCenterEastlng", "gridCenterNorthlatDec", "gridCenterEastlngDec",
"daichi_distance", "no_samples", "AvgAirDoseRate", "NE_nLat",
"NE_eLong", "NW_nLat", "NW_eLong", "SW_nLat", "SW_eLong",
"SE_nLat", "SE_eLong")
dim(fuk2013)
## [1] 119522 18
dim(fuk2011)
## [1] 45273 18
max(fuk2013$AvgAirDoseRate)
## [1] 39
min(fuk2013$AvgAirDoseRate)
## [1] 0.009
max(fuk2011$AvgAirDoseRate)
## [1] 37
min(fuk2011$AvgAirDoseRate)
## [1] 0.008
fuk2011_q <- fuk2011 %>% mutate(dose_quants = cut2(fuk2011$AvgAirDoseRate,
cuts = c(0.1, 1, 5, 15, 25, 35, 45), levels.mean = TRUE))
fuk2013_q <- fuk2013 %>% mutate(dose_quants = cut2(fuk2013$AvgAirDoseRate,
cuts = c(0.1, 1, 5, 15, 25, 35, 45), levels.mean = TRUE))
iro <- colorFactor(palette = "YlOrRd", domain = fuk2011_q$dose_quants)
iro2013 <- colorFactor(palette = "YlOrRd", domain = fuk2013_q$dose_quants)
# Link of Daichi
fukulink <- paste(sep = "<br/>", "<br><a href='http://www.tepco.co.jp
/en/decommision/index-e.html'>Fukushima Daichi</a></b>",
"Source of radiations")
fuk2011_plot <- leaflet() %>% addTiles() %>% addRectangles(data = fuk2011_q,
lng1 = ~SW_eLong, lat1 = ~SW_nLat, lng2 = ~NE_eLong, lat2 = ~NE_nLat,
color = ~iro(fuk2011_q$dose_quants)) %>% addLegend("bottomright",
pal = iro, values = fuk2011_q$dose_quants, title = "AvgAirDoseRate",
labFormat = labelFormat(prefix = "µSv/h "), opacity = 1) %>%
addPopups(lat = 37.4211, lng = 141.0328, popup = fukulink,
options = popupOptions(closeButton = TRUE))
fuk2011_plot
fuk2013_plot <- leaflet() %>%
addTiles()%>%
addRectangles(data = fuk2013_q,lng1 = ~SW_eLong, lat1 = ~SW_nLat,
lng2 = ~NE_eLong, lat2 = ~NE_nLat,
color = ~iro2013(fuk2013_q$dose_quants))%>%
addLegend("bottomright", pal = iro2013, values = fuk2013_q$dose_quants,
title = "AvgAirDoseRate",
labFormat = labelFormat(prefix = "µSv/h "),
opacity = 1)%>%
addPopups(lat = 37.4211, lng = 141.0328,popup = fukulink,
options = popupOptions(closeButton = TRUE))
fuk2013_plot
ggplot(fuk2011_q, aes(daichi_distance,AvgAirDoseRate)) +
geom_point() +
geom_smooth(se = FALSE)+
ggtitle("AvgAirDose against Distance to Daichi Plant")
ggplot(data = fuk2013_q) +
geom_bar(mapping = aes(x = daichi_distance, fill = dose_quants), width = 1)+
ggtitle("AvgAirDose Measured Counts against Daichi Distance")
\[AvgAirDoseRate = \beta_{0} + \beta_{1} daichi distance + \epsilon_{i}\]
fit1 <- lm(AvgAirDoseRate~daichi_distance, data = fuk2011)
summary(fit1)
##
## Call:
## lm(formula = AvgAirDoseRate ~ daichi_distance, data = fuk2011)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.226 -0.901 -0.321 0.165 34.291
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.7838704 0.0267272 104.16 <2e-16 ***
## daichi_distance -0.0288916 0.0004048 -71.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.278 on 45271 degrees of freedom
## Multiple R-squared: 0.1012, Adjusted R-squared: 0.1011
## F-statistic: 5095 on 1 and 45271 DF, p-value: < 2.2e-16
#Confidence interval
confint(fit1)
## 2.5 % 97.5 %
## (Intercept) 2.73148462 2.83625613
## daichi_distance -0.02968495 -0.02809831
\[AvgAirDoseRate = \beta_{0} + \beta_{1} popn density + \beta_{2} human activities + \epsilon_{i}\]
ChallengeThe Air Dose Rate measurements were collected from three sources:
What constitutes Land Use?:
Population density and land usage is measured on a 500m² basis
fuk_pop <- read.csv("fuk.csv")
View(fuk_pop)
# Convert mesh gridecode to lat/long
mesh_latlong <- function (mesh, loc = "C")
{
mesh <- as.character(mesh)
if (length(grep("^[0-9]{4}", mesh)) == 1) {
mesh12 <- as.numeric(substring(mesh, 1, 2))
mesh34 <- as.numeric(substring(mesh, 3, 4))
lat_width <- 2 / 3;
long_width <- 1;
}
else {
return(NULL)
}
if (length(grep("^[0-9]{6}", mesh)) == 1) {
mesh5 <- as.numeric(substring(mesh, 5, 5))
mesh6 <- as.numeric(substring(mesh, 6, 6))
lat_width <- lat_width / 8;
long_width <- long_width / 8;
}
if (length(grep("^[0-9]{8}", mesh)) == 1) {
mesh7 <- as.numeric(substring(mesh, 7, 7))
mesh8 <- as.numeric(substring(mesh, 8, 8))
lat_width <- lat_width / 10;
long_width <- long_width / 10;
}
lat <- mesh12 * 2 / 3;
long <- mesh34 + 100;
if (exists("mesh5") && exists("mesh6")) {
lat <- lat + (mesh5 * 2 / 3) / 8;
long <- long + mesh6 / 8;
}
if (exists("mesh7") && exists("mesh8")) {
lat <- lat + (mesh7 * 2 / 3) / 8 / 10;
long <- long + mesh8 / 8 / 10;
}
if (loc == "C") {
lat <- lat + lat_width / 2;
long <- long + long_width / 2;
}
if (length(grep("N", loc)) == 1) {
lat <- lat + lat_width;
}
if (length(grep("E", loc) == 1)) {
long <- long +long_width;
}
lat <- sprintf("%.8f", lat);
long <- sprintf("%.8f", long);
x <- list(as.numeric(lat), as.numeric(long))
names(x) <- c("lat", "long")
return (x)
}
# j <- mesh_latlong(mesh = 564000463 , loc = "C")
# class(j);print(j);length(j)
grides <- fuk_pop[,1]
head(grides);class(grides);length(grides)
## [1] 564000194 564000364 564000382 564000393 564000462 564000463
## [1] "integer"
## [1] 10831
grides[2]
## [1] 564000364
#create the lat/long
mylist <- list()
for (i in 1:length(grides) ){
lis <- mesh_latlong(mesh = grides[i], loc = "C")
mylist[[i]] <- lis
}
res <- as.data.frame(mylist)
#test
# df <- data.frame(matrix(unlist(res), nrow=10831, byrow=T))
# df <- ldply (res, data.frame)
library(data.table)
df <- as.data.frame(rbindlist(mylist, fill=TRUE))
df[,"gridcode"] <- grides
View(df)
# merge gridcode and lat/lon datasets
fuk_ll <- merge(fuk_pop, df, by.x = "gridcode", by.y = "gridcode", all = TRUE)
write.csv(fuk_ll, file = "fuk_ll.csv")
#trial plots
pop_plot <- leaflet() %>%
addTiles()%>%
addPolylines(data = fuk_ll,lng = ~long, lat = ~lat)
pop_plot
#remove duplicate lat/lon
uniq <- unique(x=df[,-3])
uniq_plot <- leaflet() %>%
addTiles()%>%
addCircles(data = uniq,lng = ~long, lat = ~lat)
uniq_plot
# Unique, lat or lon only
data <- subset(fuk_ll, !duplicated(fuk_ll[,7]))
dim(data)
## [1] 80 7
View(data)
uniq_plot <- leaflet() %>%
addTiles()%>%
addCircles(data = data,lng = ~long, lat = ~lat)
uniq_plot
The Fukushima Nuclear Radiations is multi dimensional;
The above dimensionalities coupled with distance,population density and land use, create a data set that can be run on extensive machine learning algorithms like Support Vector Machines, Random Forest, Recurrent Neural Networks and more.
end